4 Genomic data

4.1 Read duplicates

all_data %>%
    select(dataset,Extraction,duplicates,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(duplicates), sd(duplicates))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of fraction of duplicated reads")
tinytable_8qfrsanq39own0cxbbpv
Mean and standard deviation of fraction of duplicated reads
Taxon ZYMO DREX EHEX
Amphibian 0.3±0.2 0.2±0.2 0.2±0.2
Reptile 0.5±0.4 0.3±0.3 0.4±0.4
Mammal 0.4±0.4 0.2±0.1 0.2±0.2
Bird 0.8±0.3 0.9±0.1 0.6±0.4
Control 1.0±0.0 0.9±0.0 1.0±0.0
all_data %>%
    select(dataset,Extraction,duplicates,Taxon, Species) %>%
    mutate(duplicates=duplicates*100) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=duplicates, color=Species, group=Extraction)) + 
        scale_y_reverse() +
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Duplication rate (%)",x="Extraction method")

all_data %>%
    select(dataset,Sample,Species,Extraction,duplicates,Taxon) %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(duplicates ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_il02xm1hwlqvsesjys67
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 0.48640693 0.07257915 6.701744 14.33359 8.937456e-06
fixed NA ExtractionDREX -0.09279626 0.03900562 -2.379048 160.72377 1.853073e-02
fixed NA ExtractionEHEX -0.14618630 0.03918499 -3.730671 160.73104 2.644423e-04
ran_pars Sample sd__(Intercept) 0.00000000 NA NA NA NA
ran_pars Species sd__(Intercept) 0.23148121 NA NA NA NA
ran_pars Residual sd__Observation 0.21005171 NA NA NA NA

4.2 Depth of coverage

all_data %>%
    select(dataset,Extraction,coverage_depth,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(coverage_depth), sd(coverage_depth))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of fraction of duplicated reads")
tinytable_ot5v9f6wejam5rf4b4ik
Mean and standard deviation of fraction of duplicated reads
Taxon ZYMO DREX EHEX
Amphibian 0.0±0.0 0.0±0.0 0.0±0.0
Reptile 0.2±0.3 0.1±0.1 0.1±0.1
Mammal 0.7±1.2 0.3±0.4 0.5±1.0
Bird 0.4±0.8 0.6±0.5 0.8±0.8
Control 0.0±0.0 0.0±0.0 0.0±0.0
all_data %>%
    select(dataset,Extraction,coverage_depth,Taxon, Species) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=coverage_depth, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Depth of coverage",x="Extraction method")

all_data %>%
    select(dataset,Sample,Species,Extraction,coverage_depth,Taxon) %>%
    unique() %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(coverage_depth ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_klelp5tidrp8zjzm7ksh
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 0.329125000 0.1331063 2.47264850 20.58494 0.02222872
fixed NA ExtractionDREX -0.089791667 0.1144640 -0.78445305 48.00000 0.43662873
fixed NA ExtractionEHEX -0.002791667 0.1144640 -0.02438903 48.00000 0.98064341
ran_pars Sample sd__(Intercept) 0.408634577 NA NA NA NA
ran_pars Species sd__(Intercept) 0.224731208 NA NA NA NA
ran_pars Residual sd__Observation 0.396515070 NA NA NA NA

4.3 Breadth of coverage

all_data %>%
    select(dataset,Extraction,coverage_breadth,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(coverage_breadth), sd(coverage_breadth))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of depth of host genome coverage")
tinytable_869by62zm9jrbdl8ox9n
Mean and standard deviation of depth of host genome coverage
Taxon ZYMO DREX EHEX
Amphibian 0.0±0.0 0.0±0.0 0.0±0.0
Reptile 4.9±7.5 3.0±5.4 2.9±3.7
Mammal 5.7±5.9 10.2±16.4 15.1±26.4
Bird 0.6±0.5 3.2±4.4 8.9±13.9
Control 0.0±0.0 0.0±0.0 0.0±0.0
all_data %>%
    select(dataset,Extraction,coverage_breadth,Taxon,Species) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=coverage_breadth, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Breadth of coverage (%)",x="Extraction method")

all_data %>%
    select(dataset,Extraction,Sample,Species,coverage_breadth,Taxon) %>%
    unique() %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(coverage_breadth ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_3yllnc0yqjzr5rw9y0wq
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 2.799167 2.252826 1.2425133 20.82558 0.22785670
fixed NA ExtractionDREX 1.301250 1.956426 0.6651158 48.00000 0.50915975
fixed NA ExtractionEHEX 3.918750 1.956426 2.0030144 48.00000 0.05084021
ran_pars Sample sd__(Intercept) 7.118462 NA NA NA NA
ran_pars Species sd__(Intercept) 3.549767 NA NA NA NA
ran_pars Residual sd__Observation 6.777259 NA NA NA NA

4.4 Breadth vs. coverage

all_data_log <- all_data %>%
    mutate(coverage_breadth_log=log(coverage_breadth)) %>%
    mutate(coverage_depth_log=log(coverage_depth)) 

lm_eq <- lm(coverage_breadth_log ~ coverage_depth_log, data = all_data_log %>% filter(coverage_depth_log != -Inf ,coverage_breadth_log != -Inf))
coef <- coef(lm_eq)
all_data_log$coverage_breadth_log_pred <- coef[1] + coef[2] * all_data_log$coverage_depth_log


all_data_log %>%
    select(dataset,Extraction,coverage_depth_log,coverage_breadth_log,coverage_breadth_log_pred,Taxon,Species,Sample) %>%
    unique() %>%
    ggplot(aes(x=coverage_depth_log,y=coverage_breadth_log)) + 
        geom_point(aes(color=Species, shape=Extraction),size=3, alpha=0.9) + 
        geom_segment(aes(x = coverage_depth_log, y = coverage_breadth_log, xend = coverage_depth_log, yend = coverage_breadth_log_pred, color=Species), alpha=0.9)+
        geom_smooth(method = lm, se = FALSE, color="#666666") +
        scale_color_manual(values=vertebrate_colors) +
        theme_minimal() +
        labs(y="Breadth of coverage (%)",x="Depth of coverage")